getwd()
setwd("C:/Users/jmweiss/Documents/ecol 562")
alligators<-read.csv('alligators.csv')
alligators[1:12,]

#create tabulated version of the data
table(alligators$food,alligators$lake,alligators$size,alligators$gender)
alli.dat<-data.frame(table(alligators$food,alligators$lake,alligators$size,alligators$gender))
dim(alli.dat)
alli.dat[1:8,]
names(alli.dat)[1:4]<-c('food','lake','size','gender')
alli.dat[1:8,]

library(nnet)
#null model
fit0<-multinom(food~1,data=alligators)
logLik(fit0)
#gender model
fit1<-multinom(food~gender,data=alligators)
anova(fit0,fit1)

#fitting the same model using the tabulated data set
fit0a<-multinom(food~1,data=alli.dat,weight=Freq)
fit1a<-multinom(food~gender,data=alli.dat,weight=Freq)
anova(fit0a,fit1a)

#fit additional main effects models
fit1<-multinom(food~gender,data=alligators)
fit2<-multinom(food~size,data=alligators)
fit3<-multinom(food~lake,data=alligators)
AIC(fit1,fit2,fit3)

#test effects of size and lake
anova(fit0,fit2)
anova(fit0,fit3)

#fit models additive in two variables
fit4<-multinom(food~lake+size,data=alligators)
fit5<-multinom(food~lake+gender,data=alligators)
fit6<-multinom(food~size+gender,data=alligators)
AIC(fit4,fit5,fit6)
#test effect of lake controlling for size
anova(fit2,fit4)
#test effect of size controlling for lake
anova(fit3,fit4)

#model additive in all three variables
fit7<-multinom(food~lake+size+gender,data=alligators)
#test the effect of gender controlling for lake and size
anova(fit4,fit7)

summary(fit4)
names(summary(fit4))
#calcualte z-statistic for effects
summary(fit4)$coefficients/summary(fit4)$standard.errors

#fit multinomial models using a Poisson distribution
#null model
glm0<-glm(Freq~size*gender*lake+food,data=alli.dat,family=poisson)
#gender only model
glm1<-glm(Freq~size*gender*lake+food+food:gender,data=alli.dat,family=poisson)
#test statistic is identical to multinomial test
anova(glm0,glm1,test='Chisq')

#Poisson model estimates a lot of extraneous effects
summary(glm1)
summary(fit1)

#saturated model
fitS<-multinom(food~lake*size*gender,data=alligators)
#test if there is a significant lack of fit
anova(fit4,fitS)

#saturated Poisson model has residual deviance = 0
glmS<-glm(Freq~size*gender*lake+food+food*gender*size*lake,data=alli.dat,family=poisson)
anova(glmS)

#create data frame for use in obtaining predictions
mydat<-data.frame(table(alligators$lake,alligators$size,alligators$gender))
names(mydat)[1:3]<-c('lake','size','gender')
out.p<-predict(fit4,type='probs',newdata=mydat[,1:3])
out.p
#check that rows sum to 1
apply(out.p,1,sum)
#obtain expected values under the estimated model
sapply(1:5,function(x) out.p[,x]*mydat$Freq)->out.freq
out.freq
#check that rows sum to the observed frequencies
apply(out.freq,1,sum)

#restructure the observed counts as a matrix organized like the expected values
alli.dat[1:8,]
out.obs<-matrix(alli.dat[,5],ncol=5,byrow=T)
out.obs

#G-test: LR test with the saturated model
gtest.func<-function(O,E) {
result<-ifelse(O==0,0,2*O*log(O/E))
sum(result)}

# try test on observed and expected values
gtest.func(as.vector(out.obs),as.vector(out.freq))
# should match output from anova
anova(fit4,fitS)

#percentage of expected values less than 5
sum(out.freq<5)/length(as.vector(out.freq))

my.simulation<-function(y) {
out.sim<-
t(sapply(1:16,function(x) rmultinom(1,size=mydat$Freq[x],prob=out.p[x,])))
gtest.func(as.vector(out.sim),as.vector(out.freq))
}

#obtain simulation-based p-value of goodness of fit (GOF) test
set.seed(10)
sapply(1:999,my.simulation)->results

#calculate actual GOF statistic
gtest.func(as.vector(out.obs),as.vector(out.freq))->actual
actual
#add it to simulated values
final.results<-c(actual,results)
#calculate p-value
sum(final.results>=actual)/1000

#display results in a histogram
hist(final.results)
points(actual,0,col=2,pch=8)

#lake + size model fit as a Poisson model
glm4<-glm(Freq~size*gender*lake+food+food:lake+food:size,data=alli.dat,family=poisson)
summary(glm4)$coefficients->out.coef
out.coef
#locate coefficients of interest in the summary table
grep('food',rownames(out.coef))
out.coef[grep('food',rownames(out.coef)),]

#if GOF rejected we can estimate a quasi-Poisson model
glm4a<-glm(Freq~size*gender*lake+food+food:lake+food:size,data=alli.dat,family=quasipoisson)
summary(glm4a)$coefficients->out.coefa
out.coefa[grep('food',rownames(out.coefa)),]

#estimates are the same but the standard errors are larger
out.coefa[grep('food',rownames(out.coefa))[5:10],]
out.coef[grep('food',rownames(out.coef))[5:10],]

#quasi-Poisson model is not a likelihood-based model
AIC(glm4)
AIC(glm4a)

##### ordinal logistic regression #####

trees<-read.csv('trees.csv')
dim(trees)
trees[1:8,]
#extract data for flowering dogwood
cornus<-trees[trees$SPP=='CORNFLO',]
dim(cornus)
#not all cover values are represented
table(cornus$truecover)

#proportional odds model using MASS package
library(MASS)
fit1<-polr(ordered(truecover)~W,data=cornus)
fit1

#proportional odds model using VGAM package
library(VGAM)
fit1a<-vglm(factor(truecover)~W,data=cornus,family=cumulative(reverse=T,parallel=T))
fit1a
#can fit cumulative logits with separate coefficients for predictor

fit1a<-vglm(factor(truecover)~W,data=cornus,family=cumulative(reverse=T,parallel=F))

#model does not converge--need to collapse categories
cornus$ord2 <- ifelse(cornus$truecover<4, 1, ifelse(cornus$truecover>5,6,cornus$truecover))
table(cornus$truecover) 
table(cornus$ord2)

fit1a<-vglm(factor(ord2)~W,data=cornus,family=cumulative(reverse=T,parallel=T))
fit1b<-vglm(factor(ord2)~W,data=cornus,family=cumulative(reverse=T,parallel=F~W))

#models can be compared using analysis of deviance to test proportional odds assumption
deviance(fit1a)
deviance(fit1b)

#test suggests ordinal model does not fit
1-pchisq(deviance(fit1a)-deviance(fit1b),df=df.residual(fit1a)-df.residual(fit1b))